Having pre-processed the data, here, I analyse the evolutionary transcriptomics in three brown algal species, across life cycle stages and in different tissues. I use the term TAI often here. It stands for “transcriptome age index” (see the R package myTAI for further details). Note: permutations here are set at 500 rather than 50000 to ensure a fast run.

Evolutionary transcriptome in Fucus embryogenesis

Here, we examine the TAI across the Fucus embryogenesis. We find a developmental hourglass.

library(tidyverse)
library(myTAI)

Load PES

Non-transformed

Fd_PES <-
  readr::read_csv(file = "data/Fd_PES.csv", show_col_types = FALSE)
Fd_PES_F <-
  readr::read_csv(file = "data/Fd_PES_F.csv", show_col_types = FALSE)
Fd_PES_M <-
  readr::read_csv(file = "data/Fd_PES_M.csv", show_col_types = FALSE)
Fs_PES <-
  readr::read_csv(file = "data/Fs_PES.csv", show_col_types = FALSE)
Fs_PES_F <-
  readr::read_csv(file = "data/Fs_PES_F.csv", show_col_types = FALSE)
Fs_PES_M <-
  readr::read_csv(file = "data/Fs_PES_M.csv", show_col_types = FALSE)
Ec_PES_32m <-
  readr::read_csv(file = "data/Ec_PES_32m.csv", show_col_types = FALSE)
Ec_PES_25f <-
  readr::read_csv(file = "data/Ec_PES_25f.csv", show_col_types = FALSE)

sqrt-tranformed

Fd_PES.sqrt <-
  readr::read_csv(file = "data/Fd_PES.sqrt.csv", show_col_types = FALSE)
Fd_PES_F.sqrt <-
  readr::read_csv(file = "data/Fd_PES_F.sqrt.csv", show_col_types = FALSE)
Fd_PES_M.sqrt <-
  readr::read_csv(file = "data/Fd_PES_M.sqrt.csv", show_col_types = FALSE)
Fs_PES.sqrt <-
  readr::read_csv(file = "data/Fs_PES.sqrt.csv", show_col_types = FALSE)
Fs_PES_F.sqrt <-
  readr::read_csv(file = "data/Fs_PES_F.sqrt.csv", show_col_types = FALSE)
Fs_PES_M.sqrt <-
  readr::read_csv(file = "data/Fs_PES_M.sqrt.csv", show_col_types = FALSE)
Ec_PES_32m.sqrt <-
  readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv", show_col_types = FALSE)
Ec_PES_25f.sqrt <-
  readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv", show_col_types = FALSE)

log2-tranformed

Fd_PES.log2 <-
  readr::read_csv(file = "data/Fd_PES.log2.csv", show_col_types = FALSE)
Fd_PES_F.log2 <-
  readr::read_csv(file = "data/Fd_PES_F.log2.csv", show_col_types = FALSE)
Fd_PES_M.log2 <-
  readr::read_csv(file = "data/Fd_PES_M.log2.csv", show_col_types = FALSE)
Fs_PES.log2 <-
  readr::read_csv(file = "data/Fs_PES.log2.csv", show_col_types = FALSE)
Fs_PES_F.log2 <-
  readr::read_csv(file = "data/Fs_PES_F.log2.csv", show_col_types = FALSE)
Fs_PES_M.log2 <-
  readr::read_csv(file = "data/Fs_PES_M.log2.csv", show_col_types = FALSE)
Ec_PES_32m.log2 <-
  readr::read_csv(file = "data/Ec_PES_32m.log2.csv", show_col_types = FALSE)
Ec_PES_25f.log2 <-
  readr::read_csv(file = "data/Ec_PES_25f.log2.csv", show_col_types = FALSE)

rank-tranformed

Fd_PES.rank <-
  readr::read_csv(file = "data/Fd_PES.rank.csv", show_col_types = FALSE)
Fd_PES_F.rank <-
  readr::read_csv(file = "data/Fd_PES_F.rank.csv", show_col_types = FALSE)
Fd_PES_M.rank <-
  readr::read_csv(file = "data/Fd_PES_M.rank.csv", show_col_types = FALSE)
Fs_PES.rank <-
  readr::read_csv(file = "data/Fs_PES.rank.csv", show_col_types = FALSE)
Fs_PES_F.rank <-
  readr::read_csv(file = "data/Fs_PES_F.rank.csv", show_col_types = FALSE)
Fs_PES_M.rank <-
  readr::read_csv(file = "data/Fs_PES_M.rank.csv", show_col_types = FALSE)
Ec_PES_32m.rank <-
  readr::read_csv(file = "data/Ec_PES_32m.rank.csv", show_col_types = FALSE)
Ec_PES_25f.rank <-
  readr::read_csv(file = "data/Ec_PES_25f.rank.csv", show_col_types = FALSE)

rlog-tranformed

Fd_PES.rlog <-
  readr::read_csv(file = "data/Fd_PES.rlog.csv", show_col_types = FALSE)
Fd_PES_F.rlog <-
  readr::read_csv(file = "data/Fd_PES_F.rlog.csv", show_col_types = FALSE)
Fd_PES_M.rlog <-
  readr::read_csv(file = "data/Fd_PES_M.rlog.csv", show_col_types = FALSE)
Fs_PES.rlog <-
  readr::read_csv(file = "data/Fs_PES.rlog.csv", show_col_types = FALSE)
Fs_PES_F.rlog <-
  readr::read_csv(file = "data/Fs_PES_F.rlog.csv", show_col_types = FALSE)
Fs_PES_M.rlog <-
  readr::read_csv(file = "data/Fs_PES_M.rlog.csv", show_col_types = FALSE)
Ec_PES_32m.rlog <-
  readr::read_csv(file = "data/Ec_PES_32m.rlog.csv", show_col_types = FALSE)
Ec_PES_25f.rlog <-
  readr::read_csv(file = "data/Ec_PES_25f.rlog.csv", show_col_types = FALSE)

TAI calculation

This function uses parts of the myTAI::PlotSignature() function.

#TI stands for transcriptome index
TI.preplot <- function(ExpressionSet, permutations = 500){
  std <- 
    myTAI::bootMatrix(
      ExpressionSet = ExpressionSet, 
      permutations  = permutations) %>% 
    apply(2, stats::sd)
  TI.out <- 
    myTAI::TAI(ExpressionSet) %>%
    tibble::as_tibble(rownames = "Stage", colnames = c("TAI", "PS")) %>% 
    dplyr::bind_cols(as_tibble(std)) %>%
    dplyr::rename(TAI = 2, sd = 3)
  return(TI.out)
}

Fucus distichus

# function to process Fd
get_TAI_Fd <- function(PES_all, PES_M, PES_F, ordered_stages){
  TAI_b <- 
    TI.preplot(
      dplyr::select(PES_all, !c("gamete"))) %>%
    dplyr::mutate(Sex = "Mixed")
  stages_to_NA <- # this will differ for Fucus serratus
    ordered_stages[-c(1, 1+1)]
  TAI_M <- 
    TI.preplot(
      PES_M) %>%
    tibble::add_row(
      dplyr::filter(
        dplyr::select(TAI_b, !Sex), 
        Stage == ordered_stages[1+1])) %>%
    tibble::add_row(
      tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
    ) %>%
    dplyr::mutate(Sex = "Male")
  TAI_F <- 
    TI.preplot(
      PES_F) %>%
    tibble::add_row(
      dplyr::filter(
        dplyr::select(TAI_b, !Sex), 
        Stage == ordered_stages[1+1])) %>%
    tibble::add_row(
      tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
    ) %>%
    dplyr::mutate(Sex = "Female") # This is not true - this is needed for the plotting.
  TAI_out <- 
    dplyr::bind_rows(TAI_b, TAI_M, TAI_F)
  TAI_out$Stage <- base::factor(TAI_out$Stage, ordered_stages)
  return(TAI_out)
}
ordered_stages <- colnames(Fd_PES)[3:ncol(Fd_PES)]
Fd_TAI <- 
  get_TAI_Fd(
    PES_all = Fd_PES, 
    PES_M = Fd_PES_M, 
    PES_F = Fd_PES_F, 
    ordered_stages)
Fd_TAI.log2 <- 
  get_TAI_Fd(
    PES_all = Fd_PES.log2, 
    PES_M = dplyr::rename(Fd_PES_M.log2, gamete = V1), 
    PES_F = dplyr::rename(Fd_PES_F.log2, gamete = V1), 
    ordered_stages)
Fd_TAI.sqrt <- 
  get_TAI_Fd(
    PES_all = Fd_PES.sqrt, 
    PES_M = dplyr::rename(Fd_PES_M.sqrt, gamete = V1), 
    PES_F = dplyr::rename(Fd_PES_F.sqrt, gamete = V1), 
    ordered_stages)
Fd_TAI.rank <- 
  get_TAI_Fd(
    PES_all = Fd_PES.rank, 
    PES_M = dplyr::rename(Fd_PES_M.rank, gamete = V1), 
    PES_F = dplyr::rename(Fd_PES_F.rank, gamete = V1), 
    ordered_stages)
Fd_TAI.rlog <- 
  get_TAI_Fd(
    PES_all = Fd_PES.rlog, 
    PES_M = Fd_PES_M.rlog, 
    PES_F = Fd_PES_F.rlog, 
    ordered_stages)

Fucus serratus

# function to process Fd.
get_TAI_Fs <- function(PES_all, PES_M, PES_F, ordered_stages){
  TAI_b <- 
    TI.preplot(
      dplyr::select(PES_all, !c("gamete","matSP"))) %>%
    dplyr::mutate(Sex = "Mixed")
  stages_to_NA <- # this will differ for Fucus distichus
    ordered_stages[-c(1, 1+1, length(ordered_stages) - 1, length(ordered_stages))]
  TAI_M <- 
    TI.preplot(
      PES_M) %>%
    tibble::add_row(
      dplyr::filter(
        dplyr::select(TAI_b, !Sex), 
        Stage %in% c(
          ordered_stages[1+1], 
          ordered_stages[length(ordered_stages) - 1]))) %>%
    tibble::add_row(
      tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
    ) %>%
    dplyr::mutate(Sex = "Male")
  TAI_F <- 
    TI.preplot(
      PES_F) %>%
    tibble::add_row(
      dplyr::filter(
        dplyr::select(TAI_b, !Sex), 
        Stage %in% c(
          ordered_stages[1+1], 
          ordered_stages[length(ordered_stages) - 1]))) %>%
    tibble::add_row(
      tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
    ) %>%
    dplyr::mutate(Sex = "Female") # This is not true. this is needed for the plotting.
  TAI_out <- 
    dplyr::bind_rows(TAI_b, TAI_M, TAI_F)
  TAI_out$Stage <- base::factor(TAI_out$Stage, ordered_stages)
  return(TAI_out)
}
ordered_stages <- colnames(Fs_PES)[3:ncol(Fs_PES)]
Fs_TAI <- 
  get_TAI_Fs(
    PES_all = Fs_PES, 
    PES_M = Fs_PES_M, 
    PES_F = Fs_PES_F, 
    ordered_stages)
Fs_TAI.log2 <- 
  get_TAI_Fs(
    PES_all = Fs_PES.log2, 
    PES_M = Fs_PES_M.log2, 
    PES_F = Fs_PES_F.log2,
    ordered_stages)
Fs_TAI.sqrt <- 
  get_TAI_Fs(
    PES_all = Fs_PES.sqrt, 
    PES_M = Fs_PES_M.sqrt, 
    PES_F = Fs_PES_F.sqrt, 
    ordered_stages)
Fs_TAI.rank <- 
  get_TAI_Fs(
    PES_all = Fs_PES.rank, 
    PES_M = Fs_PES_M.rank, 
    PES_F = Fs_PES_F.rank, 
    ordered_stages)
Fs_TAI.rlog <- 
  get_TAI_Fs(
    PES_all = Fs_PES.rlog, 
    PES_M = Fs_PES_M.rlog, 
    PES_F = Fs_PES_F.rlog, 
    ordered_stages)
#Fs
Fs_TAI %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus serratus",
       subtitle = "TPM")

Fs_TAI.sqrt %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus serratus",
       subtitle = "sqrt(TPM)")

# save plot
# ggplot2::ggsave(filename = "Fs_TAI_embryo.pdf", device = "pdf", width = 4.5, height = 2.5)


Fs_TAI.log2 %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus serratus",
       subtitle = "log2(TPM+\u03b1)")

Fs_TAI.rank %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus serratus",
       subtitle = "rank(TPM)")

Fs_TAI.rlog %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus serratus",
       subtitle = "rlog(TPM)")

# Fd
Fd_TAI %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus distichus",
       subtitle = "TPM")

Fd_TAI.sqrt %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus distichus",
       subtitle = "sqrt(TPM)")

# save plot
# ggplot2::ggsave(filename = "Fd_TAI_embryo.pdf", device = "pdf", width = 4.5, height = 2.5)


Fd_TAI.log2 %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus distichus",
       subtitle = "log2(TPM+\u03b1)")

Fd_TAI.rank %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus distichus",
       subtitle = "rank(TPM)")

Fd_TAI.rlog %>% 
  dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = 1)) +
  ggplot2::geom_ribbon(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    colour = "#00A087FF", 
    fill = "#00A087FF", 
    alpha = 0.1) +
  ggplot2::geom_line(
    linewidth = 2,
    lineend = "round",
    colour = "#00A087FF") +
  theme_classic() +
  ggsci::scale_colour_npg() +
  labs(title = "Fucus distichus",
       subtitle = "rlog(TPM)")

Statistical significance of TAI profiles

Flat line test

F. serratus
Fs_PES_tS <- 
  tfStability(
    dplyr::select(Fs_PES, !c(gamete, matSP)),
    transforms = c("none", "sqrt", "log2", "rank"),
    permutations = 500
    )
Fs_PES_tS_processed <-
  tibble::as_tibble(Fs_PES_tS, rownames = "transformation")

Fs_PES_tS.rlog <- 
  tfStability(
    dplyr::select(Fs_PES.rlog, !c(gamete, matSP)),
    transforms = c("none"), # it is already transformed.
    permutations = 500
    )
Fs_PES_tS.rlog_processed <-
  tibble::as_tibble(Fs_PES_tS.rlog, rownames = "transformation") %>%
  dplyr::mutate(transformation = "rlog")

Fs_PES_tS_res <- 
  dplyr::bind_rows(Fs_PES_tS_processed, Fs_PES_tS.rlog_processed) %>%
  dplyr::rename("pval" = value) %>%
  dplyr::mutate(test = "FlatLineTest")
F. distichus
Fd_PES_tS <- 
  tfStability(
    dplyr::select(Fd_PES, !c(gamete, matSP)),
    transforms = c("none", "sqrt", "log2", "rank"),
    permutations = 500
    )
Fd_PES_tS_processed <-
  tibble::as_tibble(Fd_PES_tS, rownames = "transformation")

Fd_PES_tS.rlog <- 
  tfStability(
    dplyr::select(Fd_PES.rlog, !c(gamete, matSP)),
    transforms = c("none"), # it is already transformed.
    permutations = 500
    )
Fd_PES_tS.rlog_processed <-
  tibble::as_tibble(Fd_PES_tS.rlog, rownames = "transformation") %>%
  dplyr::mutate(transformation = "rlog")

Fd_PES_tS_res <- 
  dplyr::bind_rows(Fd_PES_tS_processed, Fd_PES_tS.rlog_processed) %>%
  dplyr::rename("pval" = value) %>%
  dplyr::mutate(test = "FlatLineTest")

Plot -log10(p-values)

The flat line test meets the assumptions that V_p can be roughly approximated by a gamma distribution. Here is an example.

FlatLineTest(tf(dplyr::select(Fs_PES, !c(gamete, matSP)), FUN = sqrt), 
             plotHistogram = T,runs = 1, permutations = 3000)
## 
## Total runtime of your permutation test: 0.626  seconds.
## 
## Total runtime of your permutation test: 0.625  seconds.

## $p.value
## [1] 4.802999e-19
## 
## $std.dev
## [1] 0.03578254 0.03280390 0.03623616 0.03800315 0.03454780
## 
## $ks.test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  filtered_vars
## D = 0.024253, p-value = 0.06126
## alternative hypothesis: two-sided
Fucus serratus embryo
Fs_PES_tS_res %>%
  ggplot2::ggplot(
    aes(
      y = -log10(pval), 
      x = transformation)) +
  ggplot2::geom_col(width = 0.05) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05), 
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Fucus serratus",
    subtitle = "FlatLineTest",
    x = "Transformation"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ggplot2::ggsave(filename = "Fs_TAI_embryo_FLT.pdf", device = "pdf", width = 3, height = 2.5)
Fucus distichus embryo
Fd_PES_tS_res %>%
  ggplot2::ggplot(
    aes(
      y = -log10(pval), 
      x = transformation)) +
  ggplot2::geom_col(width = 0.05) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05), 
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Fucus distichus",
    subtitle = "FlatLineTest",
    x = "Transformation"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()

# ggplot2::ggsave(filename = "Fd_TAI_embryo_FLT.pdf", device = "pdf", width = 3, height = 2.5)

Non-flat line profile

The reductive hourglass test meets the assumptions that D_min can be roughly approximated by a normal distribution. Here is an example.

ReductiveHourglassTest(tf(dplyr::select(Fs_PES, !c(gamete, matSP)), FUN = sqrt), 
                       modules = list(early = 1:2, mid = 3:4, late = 5),
                       plotHistogram = T,runs = 1, permutations = 3000)

## $p.value
## [1] 1.307783e-08
## 
## $std.dev
## [1] 0.03508540 0.03207779 0.03611796 0.03777554 0.03440587
## 
## $lillie.test
## [1] NA
Fs_PES_tS <- 
  tfStability(
    dplyr::select(Fs_PES, !c(gamete, matSP)),
    TestStatistic = "ReductiveHourglassTest",
    transforms = c("none", "sqrt", "log2", "rank"),
    modules = list(early = 1:2, mid = 3:4, late = 5),
    permutations = 500
    )
## Proceeding with the ReductiveHourglassTest
Fs_PES_tS_processed <-
  tibble::as_tibble(Fs_PES_tS, rownames = "transformation")

Fs_PES_tS.rlog <- 
  tfStability(
    dplyr::select(Fs_PES.rlog, !c(gamete, matSP)),
    TestStatistic = "ReductiveHourglassTest",
    transforms = c("none"), # it is already transformed.
    modules = list(early = 1:2, mid = 3:4, late = 5),
    permutations = 500
    )
## Proceeding with the ReductiveHourglassTest
Fs_PES_tS.rlog_processed <-
  tibble::as_tibble(Fs_PES_tS.rlog, rownames = "transformation") %>%
  dplyr::mutate(transformation = "rlog")

Fs_PES_tS_res <- 
  dplyr::bind_rows(Fs_PES_tS_processed, Fs_PES_tS.rlog_processed) %>%
  dplyr::rename("pval" = value) %>%
  dplyr::mutate(test = "ReductiveHourglassTest")

Fd_PES_tS <- 
  tfStability(
    dplyr::select(Fd_PES, !c(gamete, matSP)),
    TestStatistic = "ReductiveHourglassTest",
    transforms = c("none", "sqrt", "log2", "rank"),
    modules = list(early = 1:4, mid = 5, late = 6),
    permutations = 500
    )
## Proceeding with the ReductiveHourglassTest
Fd_PES_tS_processed <-
  tibble::as_tibble(Fd_PES_tS, rownames = "transformation")

Fd_PES_tS.rlog <- 
  tfStability(
    dplyr::select(Fd_PES.rlog, !c(gamete, matSP)),
    TestStatistic = "ReductiveHourglassTest",
    transforms = c("none"), # it is already transformed.
    modules = list(early = 1:4, mid = 5, late = 6),
    permutations = 500
    )
## Proceeding with the ReductiveHourglassTest
Fd_PES_tS.rlog_processed <-
  tibble::as_tibble(Fd_PES_tS.rlog, rownames = "transformation") %>%
  dplyr::mutate(transformation = "rlog")

Fd_PES_tS_res <- 
  dplyr::bind_rows(Fd_PES_tS_processed, Fd_PES_tS.rlog_processed) %>%
  dplyr::rename("pval" = value) %>%
  dplyr::mutate(test = "ReductiveHourglassTest")

Plot p-values

Fucus serratus embryo
Fs_PES_tS_res %>%
  ggplot2::ggplot(
    aes(
      y = -log10(pval), 
      x = transformation)) +
  ggplot2::geom_col(width = 0.05) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05), 
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Fucus serratus",
    subtitle = "ReductiveHourglassTest",
    x = "Transformation"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()

# ggplot2::ggsave(filename = "Fs_TAI_embryo_RHT.pdf", device = "pdf", width = 3, height = 2.5)
Fucus distichus embryo
Fd_PES_tS_res %>%
  ggplot2::ggplot(
    aes(
      y = -log10(pval), 
      x = transformation)) +
  ggplot2::geom_col(width = 0.05) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05), 
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Fucus distichus",
    subtitle = "ReductiveHourglassTest",
    x = "Transformation"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()

# ggplot2::ggsave(filename = "Fd_TAI_embryo_RHT.pdf", device = "pdf", width = 3, height = 2.5)

Evolutionary transcriptome in Ectocarpus

Here, we examine the TAI across the life cycle of Ectocarpus for both male and female strains.

# function to process Fd. This will be changed to accommodate more seq data.
get_TAI_Ec <- function(PES_M, PES_F, ordered_stages){
  TAI_M <- 
    TI.preplot(PES_M) %>%
    dplyr::mutate(Sex = "Male")
  TAI_F <- 
    TI.preplot(PES_F) %>%
    dplyr::mutate(Sex = "Female")
  stages_to_NA <- # this will differ for Fucus distichus
    ordered_stages[-c(1, 1+1, length(ordered_stages) - 1, length(ordered_stages))]
  TAI_out <- 
    dplyr::bind_rows(TAI_M, TAI_F)
  TAI_out$Stage <- base::factor(TAI_out$Stage, ordered_stages)
  return(TAI_out)
}
ordered_stages <- colnames(Ec_PES_25f)[3:ncol(Ec_PES_25f)]

Ec_TAI <- 
  get_TAI_Ec(
    PES_M = Ec_PES_32m, 
    PES_F = Ec_PES_25f, 
    ordered_stages)

Ec_TAI.sqrt <- 
  get_TAI_Ec(
    PES_M = Ec_PES_32m.sqrt, 
    PES_F = Ec_PES_25f.sqrt, 
    ordered_stages)

Ec_TAI.log2 <- 
  get_TAI_Ec(
    PES_M = Ec_PES_32m.log2, 
    PES_F = Ec_PES_25f.log2, 
    ordered_stages)

Ec_TAI.rank <- 
  get_TAI_Ec(
    PES_M = Ec_PES_32m.rank, 
    PES_F = Ec_PES_25f.rank, 
    ordered_stages)

Ec_TAI.rlog <- 
  get_TAI_Ec(
    PES_M = Ec_PES_32m.rlog, 
    PES_F = Ec_PES_25f.rlog, 
    ordered_stages)
Ec_TAI %>% 
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = Sex,
      colour = Sex,
      fill = Sex)) +
  ggplot2::geom_line(alpha = 0.4, linewidth = 1) +
  ggplot2::geom_crossbar(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    alpha = 0.5,
    position = ggplot2::position_dodge(0.55), 
    width = 0.5) +
  theme_classic() +
  ggsci::scale_colour_npg() +
  ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
  labs(title = "Ectocarpus",
       subtitle = "TPM")

# ggplot2::ggsave(filename = "Ec_TAI_tpm.pdf", device = "pdf", width = 6, height = 4)

Ec_TAI.sqrt %>% 
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = Sex,
      colour = Sex,
      fill = Sex)) +
  ggplot2::geom_line(alpha = 0.4, linewidth = 1) +
  ggplot2::geom_crossbar(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    alpha = 0.5,
    position = ggplot2::position_dodge(0.55), 
    width = 0.5) +
  theme_classic() +
  ggsci::scale_colour_npg() +
  ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
  labs(title = "Ectocarpus",
       subtitle = "sqrt(TPM)")

# ggplot2::ggsave(filename = "Ec_TAI_sqrt.pdf", device = "pdf", width = 6, height = 4)

Ec_TAI.log2 %>% 
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = Sex,
      colour = Sex,
      fill = Sex)) +
  ggplot2::geom_line(alpha = 0.4, linewidth = 1) +
  ggplot2::geom_crossbar(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    alpha = 0.5,
    position = ggplot2::position_dodge(0.55), 
    width = 0.5) +
  theme_classic() +
  ggsci::scale_colour_npg() +
  ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
  labs(title = "Ectocarpus",
       subtitle = "log2(TPM+\u03b1)")

# ggplot2::ggsave(filename = "Ec_TAI_log2.pdf", device = "pdf", width = 6, height = 4)

Ec_TAI.rank %>% 
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = Sex,
      colour = Sex,
      fill = Sex)) +
  ggplot2::geom_line(alpha = 0.4, linewidth = 1) +
  ggplot2::geom_crossbar(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    alpha = 0.5,
    position = ggplot2::position_dodge(0.55), 
    width = 0.5) +
  theme_classic() +
  ggsci::scale_colour_npg() +
  ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
  labs(title = "Ectocarpus",
       subtitle = "rank(TPM)")

# ggplot2::ggsave(filename = "Ec_TAI_rank.pdf", device = "pdf", width = 6, height = 4)

Ec_TAI.rlog %>% 
  ggplot2::ggplot(
    aes(
      y = TAI, 
      x = Stage,
      group = Sex,
      colour = Sex,
      fill = Sex)) +
  ggplot2::geom_line(alpha = 0.4, linewidth = 1) +
  ggplot2::geom_crossbar(
    aes(ymin = TAI - sd,
        ymax = TAI + sd
        ), 
    alpha = 0.5,
    position = ggplot2::position_dodge(0.55), 
    width = 0.5) +
  theme_classic() +
  ggsci::scale_colour_npg() +
  ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
  labs(title = "Ectocarpus",
       subtitle = "rlog(TPM)")

# ggplot2::ggsave(filename = "Ec_TAI_rlog.pdf", device = "pdf", width = 6, height = 4)

The r-log is behaving weird because this non-linear transformation does not preserve the monotonic relationship.

Statistical significance of TAI profiles

Flat line test

The flat line test meets the assumptions that the V_p can be roughly approximated by a gamma distribution. Here is an example.

FlatLineTest(tf(Ec_PES_25f, FUN = sqrt), plotHistogram = T,runs = 1, permutations = 3000)
## 
## Total runtime of your permutation test: 1  seconds.
## 
## Total runtime of your permutation test: 1.029  seconds.

## $p.value
## [1] 2.368153e-29
## 
## $std.dev
## [1] 0.06746062 0.04727504 0.04515307 0.04023302 0.06923341 0.04028872 0.04750110
## [8] 0.03667546 0.08830003
## 
## $ks.test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  filtered_vars
## D = 0.015775, p-value = 0.4513
## alternative hypothesis: two-sided
# female
Ec_PES_25f_tS <- 
  tfStability(
    Ec_PES_25f,
    transforms = c("none", "sqrt", "log2", "rank"),
    permutations = 500
    )
Ec_PES_25f_tS_processed <-
  tibble::as_tibble(Ec_PES_25f_tS, rownames = "transformation")

Ec_PES_25f_tS.rlog <- 
  tfStability(
    Ec_PES_25f.rlog,
    transforms = c("none"), # it is already transformed.
    permutations = 500
    )
Ec_PES_25f_tS.rlog_processed <-
  tibble::as_tibble(Ec_PES_25f_tS.rlog, rownames = "transformation") %>%
  dplyr::mutate(transformation = "rlog")

Ec_PES_25f_tS_res <- 
  dplyr::bind_rows(Ec_PES_25f_tS_processed, Ec_PES_25f_tS.rlog_processed) %>%
  dplyr::rename("pval" = value) %>%
  dplyr::mutate(test = "FlatLineTest")

# male
Ec_PES_32m_tS <- 
  tfStability(
    Ec_PES_32m,
    transforms = c("none", "sqrt", "log2", "rank"),
    permutations = 500
    )
Ec_PES_32m_tS_processed <-
  tibble::as_tibble(Ec_PES_32m_tS, rownames = "transformation")

Ec_PES_32m_tS.rlog <- 
  tfStability(
    Ec_PES_32m.rlog,
    transforms = c("none"), # it is already transformed.
    permutations = 500
    )
Ec_PES_32m_tS.rlog_processed <-
  tibble::as_tibble(Ec_PES_32m_tS.rlog, rownames = "transformation") %>%
  dplyr::mutate(transformation = "rlog")

Ec_PES_32m_tS_res <- 
  dplyr::bind_rows(Ec_PES_32m_tS_processed, Ec_PES_32m_tS.rlog_processed) %>%
  dplyr::rename("pval" = value) %>%
  dplyr::mutate(test = "FlatLineTest")

Plot -log10(p-values)

Ec_PES_32m_tS_res %>%
  ggplot2::ggplot(
    aes(
      y = -log10(pval), 
      x = transformation)) +
  ggplot2::geom_col(width = 0.05) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05), 
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Ectocarpus (male)",
    subtitle = "FlatLineTest",
    x = "Transformation"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()

# ggplot2::ggsave(filename = "Ec_TAI_M_FLT.pdf", device = "pdf", width = 3, height = 2.5)

Ec_PES_25f_tS_res %>%
  ggplot2::ggplot(
    aes(
      y = -log10(pval), 
      x = transformation)) +
  ggplot2::geom_col(width = 0.05) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05), 
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Ectocarpus (female)",
    subtitle = "FlatLineTest",
    x = "Transformation"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()

# ggplot2::ggsave(filename = "Ec_TAI_F_FLT.pdf", device = "pdf", width = 3, height = 2.5)

Evolutionary transcriptome across Fucus tissues

TAI calculation

Here, I assess the TAI profiles across adult (matSP) tissues of F. serratus and F. distichus. Note, I use the square-root transformation in the paper as it strikes a balance between the log and non transformation in terms of variance stabilisation and reduction of the effect of noise in lowly expressed genes. But for completeness, I also show the profiles under various other transformations.

library(tidyverse)
library(myTAI)

Load PES

Non-transformed

Fd_PES_matSP <-
  readr::read_csv(file = "data/Fd_PES_matSP.csv", show_col_types = FALSE)
Fs_PES_F_matSP <-
  readr::read_csv(file = "data/Fs_PES_F_matSP.csv", show_col_types = FALSE)
Fs_PES_M_matSP <-
  readr::read_csv(file = "data/Fs_PES_M_matSP.csv", show_col_types = FALSE)

sqrt-tranformed

Fd_PES_matSP.sqrt <-
  readr::read_csv(file = "data/Fd_PES_matSP.sqrt.csv", show_col_types = FALSE)
Fs_PES_F_matSP.sqrt <-
  readr::read_csv(file = "data/Fs_PES_F_matSP.sqrt.csv", show_col_types = FALSE)
Fs_PES_M_matSP.sqrt <-
  readr::read_csv(file = "data/Fs_PES_M_matSP.sqrt.csv", show_col_types = FALSE)

log2-tranformed

Fd_PES_matSP.log2 <-
  readr::read_csv(file = "data/Fd_PES_matSP.log2.csv", show_col_types = FALSE)
Fs_PES_F_matSP.log2 <-
  readr::read_csv(file = "data/Fs_PES_F_matSP.log2.csv", show_col_types = FALSE)
Fs_PES_M_matSP.log2 <-
  readr::read_csv(file = "data/Fs_PES_M_matSP.log2.csv", show_col_types = FALSE)

rank-tranformed

Fd_PES_matSP.rank <-
  readr::read_csv(file = "data/Fd_PES_matSP.rank.csv", show_col_types = FALSE)
Fs_PES_F_matSP.rank <-
  readr::read_csv(file = "data/Fs_PES_F_matSP.rank.csv", show_col_types = FALSE)
Fs_PES_M_matSP.rank <-
  readr::read_csv(file = "data/Fs_PES_M_matSP.rank.csv", show_col_types = FALSE)

rlog-tranformed

Fd_PES_matSP.rlog <-
  readr::read_csv(file = "data/Fd_PES_matSP.rlog.csv", show_col_types = FALSE)
Fs_PES_M_matSP.rlog <-
  readr::read_csv(file = "data/Fs_PES_M_matSP.rlog.csv", show_col_types = FALSE)
Fs_PES_F_matSP.rlog <-
  readr::read_csv(file = "data/Fs_PES_F_matSP.rlog.csv", show_col_types = FALSE)

TAI calculation

#TI stands for transcriptome index
TI.preplot <- function(ExpressionSet, permutations = 500){
  std <- 
    myTAI::bootMatrix(
      ExpressionSet = ExpressionSet, 
      permutations  = permutations) %>% 
    apply(2, stats::sd)
  TI.out <- 
    myTAI::TAI(ExpressionSet) %>%
    tibble::as_tibble(rownames = "Tissue", colnames = c("TAI", "PS")) %>% 
    dplyr::bind_cols(as_tibble(std)) %>%
    dplyr::rename(TAI = 2, sd = 3)
  return(TI.out)
}
# no transformation
Fd_TAI_matSP <- 
  TI.preplot(Fd_PES_matSP)
Fs_TAI_F_matSP <- 
  TI.preplot(Fs_PES_F_matSP) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M_matSP <- 
  TI.preplot(Fs_PES_M_matSP) %>%
  dplyr::mutate(Sex = "M")

# sqrt
Fd_TAI_matSP.sqrt <- 
  TI.preplot(Fd_PES_matSP.sqrt)
Fs_TAI_F_matSP.sqrt <- 
  TI.preplot(Fs_PES_F_matSP.sqrt) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M_matSP.sqrt <- 
  TI.preplot(Fs_PES_M_matSP.sqrt) %>%
  dplyr::mutate(Sex = "M")

# log2
Fd_TAI_matSP.log2 <- 
  TI.preplot(Fd_PES_matSP.log2)
Fs_TAI_F_matSP.log2 <- 
  TI.preplot(Fs_PES_F_matSP.log2) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M_matSP.log2 <- 
  TI.preplot(Fs_PES_M_matSP.log2) %>%
  dplyr::mutate(Sex = "M")

# rank
Fd_TAI_matSP.rank <- 
  TI.preplot(Fd_PES_matSP.rank)
Fs_TAI_F_matSP.rank <- 
  TI.preplot(Fs_PES_F_matSP.rank) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M_matSP.rank <- 
  TI.preplot(Fs_PES_M_matSP.rank) %>%
  dplyr::mutate(Sex = "M")

# rlog
Fd_TAI_matSP.rlog <- 
  TI.preplot(Fd_PES_matSP.rlog)
Fs_TAI_F_matSP.rlog <- 
  TI.preplot(Fs_PES_F_matSP.rlog) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M_matSP.rlog <- 
  TI.preplot(Fs_PES_M_matSP.rlog) %>%
  dplyr::mutate(Sex = "M")
plot_tissue_Fd <- function(preplot_out){
  plot_out <- 
    preplot_out %>%
    ggplot2::ggplot(
      aes(
        x = "Mixed",
        y = TAI,
        group = Tissue
        )
      ) +
    ggplot2::geom_point(
      size = 4, 
      position = ggplot2::position_dodge(width=0.9),
      colour = "#00A087FF") +
    ggplot2::geom_errorbar(
      aes(ymin = TAI - sd, ymax = TAI + sd),
      width = 0.1,
      position = ggplot2::position_dodge(width=0.9),
      colour = "#00A087FF") +
    ggplot2::geom_text(
      aes(label = Tissue),
      vjust = -0.9,
      angle = 90,
      position = ggplot2::position_dodge(width = 0.9)) +
    ggplot2::labs(
      x = "Sex",
      title = "Fucus distichus \n(matSP)"
    ) +
    ggplot2::theme_classic()
  return(plot_out)
}

plot_tissue_Fs <- function(preplot_out){
  plot_out <- 
    preplot_out %>%
    ggplot2::ggplot(
      aes(
        x = Sex,
        y = TAI,
        group = Tissue,
        colour = Sex
        )
      ) +
    ggplot2::geom_point(
      size = 4, 
      position = ggplot2::position_dodge(width=0.9)) +
    ggplot2::geom_errorbar(
      aes(ymin = TAI - sd, ymax = TAI + sd),
      width = 0.1,
      position = ggplot2::position_dodge(width=0.9)) +
    ggplot2::geom_text(
      aes(label = Tissue),
      vjust = -0.9,
      angle = 90,
      position = ggplot2::position_dodge(width = 0.9),
      colour = "black") +
    ggplot2::scale_colour_manual(
      values = c("#E64B35FF", "#4DBBD5FF"), 
      guide = "none") +
    ggplot2::labs(
      x = "Sex",
      title = "Fucus serratus \n(matSP)",
      colour = NULL
    ) +
    ggplot2::theme_classic()
  return(plot_out)
}
# https://stackoverflow.com/questions/35654364/ggplot-jitter-geom-errorbar
# `ggplot2::position_jitter(seed = 1)` can also be used.
# https://stackoverflow.com/questions/56816072/position-dodge-and-nudge-y-together
# For colours:
# scales::show_col(ggsci::pal_npg("nrc")(9))
plot_tissue_Fd(Fd_TAI_matSP) + 
  ggplot2::labs(subtitle = "TPM")

plot_tissue_Fd(Fd_TAI_matSP.sqrt) + 
  ggplot2::labs(subtitle = "sqrt(TPM)")

# save sqrt
# ggplot2::ggsave(filename = "Fd_TAI_tissue.pdf", device = "pdf", width = 2, height = 4)
plot_tissue_Fd(Fd_TAI_matSP.log2) + 
  ggplot2::labs(subtitle = "log2(TPM+1)")

plot_tissue_Fd(Fd_TAI_matSP.rank) + 
  ggplot2::labs(subtitle = "rank(TPM)")

plot_tissue_Fd(Fd_TAI_matSP.rlog) + 
  ggplot2::labs(subtitle = "rlog(TPM)")

base::rbind(Fs_TAI_F_matSP, Fs_TAI_M_matSP) %>%
  plot_tissue_Fs() + 
  ggplot2::labs(subtitle = "TPM")

base::rbind(Fs_TAI_F_matSP.sqrt, Fs_TAI_M_matSP.sqrt) %>%
  plot_tissue_Fs() + 
  ggplot2::labs(subtitle = "sqrt(TPM)")

# save sqrt
# ggplot2::ggsave(filename = "Fs_TAI_tissue.pdf", device = "pdf", width = 3, height = 4)

base::rbind(Fs_TAI_F_matSP.log2, Fs_TAI_M_matSP.log2) %>%
  plot_tissue_Fs() + 
  ggplot2::labs(subtitle = "log2(TPM)")

base::rbind(Fs_TAI_F_matSP.rank, Fs_TAI_M_matSP.rank) %>%
  plot_tissue_Fs() + 
  ggplot2::labs(subtitle = "rank(TPM)")

base::rbind(Fs_TAI_F_matSP.rlog, Fs_TAI_M_matSP.rlog) %>%
  plot_tissue_Fs() + 
  ggplot2::labs(subtitle = "rlog(TPM)")

FLT of permutation statistics between males and females

# calculate p-values based on the flat line test, then adjust for multiple comparisons.
hack_flt <- function(PES_m, PES_f, permutations = 500, tr_name = "", padj = F){
  
  pval_df <- data.frame()
  
  for(i in colnames(PES_m)[-c(1,2)]){
    join_test <- dplyr::left_join(
      dplyr::select(PES_m, 1, 2, starts_with(i)),
      dplyr::select(PES_f, 1, 2, starts_with(i)),
      by = c("GeneID", "PS"))
    # join_test_df <- rbind(join_test)
    # where x is male and y is female
    pval <- myTAI::FlatLineTest(join_test, permutations = permutations)
    pval_df <- 
      dplyr::bind_rows(
        pval_df, 
        tibble::tibble(
          tissue = i, 
          p.value = pval$p.value,
          transformation = tr_name))
  }
  
  pval_df$tissue <- base::factor(pval_df$tissue, colnames(PES_m)[-c(1,2)])
  
  if(!padj){return(pval_df)}
  
  # to adjust p values
  p_value <- pval_df$p.value

  # Apply Bonferroni correction to the p-value
  p_adjusted <- p.adjust(p_value, method = "bonferroni")
  
  pval_df$padj <- p_adjusted
  
  return(pval_df)
}
# check with myTAI function

male_test <- dplyr::select(Fs_PES_M_matSP.sqrt, c(1:2, 3))
female_test <- dplyr::select(Fs_PES_F_matSP.sqrt, c(1:2, 3))

join_test <- dplyr::left_join(male_test, female_test, by = c("GeneID", "PS")) %>%
# where x is male and y is female
        dplyr::left_join(male_test, by = c("GeneID", "PS"))
# Since `Error: Intersecting modules are not defined for the ReductiveHourglassTest.`

myTAI::ReductiveHourglassTest(join_test,
                              permutations = 500,
                              modules = list(early = 1,mid = 2,late =3))
## $p.value
## [1] 0.02531625
## 
## $std.dev
## [1] 0.05460876 0.05699125 0.05460876
## 
## $lillie.test
## [1] NA
# calculate p-values based on the reductive hourglass test, then adjust for multiple comparisons.
hack_rht <- function(PES_m, PES_f, permutations = 500, tr_name = "", padj = F){
  
  pval_df <- data.frame()
  
  for(i in colnames(PES_m)[-c(1,2)]){
    join_test <- dplyr::left_join(
        dplyr::select(PES_m, 1, 2, starts_with(i)),
        dplyr::select(PES_f, 1, 2, starts_with(i)),
        by = c("GeneID", "PS")) %>%
      dplyr::left_join(dplyr::select(PES_m, 1, 2, starts_with(i)),
        by = c("GeneID", "PS"))
    # join_test_df <- rbind(join_test)
    # where x is male and y is female ... and z is male (due to intersecting module error)
    pval <- myTAI::ReductiveHourglassTest(
            join_test, 
            permutations = permutations, 
            modules = list(early = 1,mid = 2,late =3))
    pval_df <- 
      dplyr::bind_rows(
        pval_df, 
        tibble::tibble(
          tissue = i, 
          p.value = pval$p.value,
          transformation = tr_name))
  }
  
  pval_df$tissue <- base::factor(pval_df$tissue, colnames(PES_m)[-c(1,2)])
  
  if(!padj){return(pval_df)}
  
  # to adjust p values
  p_value <- pval_df$p.value

  # Apply Bonferroni correction to the p-value
  p_adjusted <- p.adjust(p_value, method = "bonferroni")
  
  pval_df$padj <- p_adjusted
  
  return(pval_df)
}
hack_rht(Fs_PES_M_matSP.sqrt, Fs_PES_F_matSP.sqrt, tr_name = "sqrt")
hack_rht(Fs_PES_M_matSP.sqrt, Fs_PES_F_matSP.sqrt, tr_name = "sqrt", padj = T)
permutations = 500

Fs_PES_perm <-
  dplyr::bind_rows(
    hack_flt(Fs_PES_M_matSP, Fs_PES_F_matSP, tr_name = "none", padj = T),
    hack_flt(Fs_PES_M_matSP.sqrt, Fs_PES_F_matSP.sqrt, tr_name = "sqrt", padj = T),
    hack_flt(Fs_PES_M_matSP.log2, Fs_PES_F_matSP.log2, tr_name = "log2", padj = T),
    hack_flt(Fs_PES_M_matSP.rank, Fs_PES_F_matSP.rank, tr_name = "rank", padj = T),
    hack_flt(Fs_PES_M_matSP.rlog, Fs_PES_F_matSP.rlog, tr_name = "rlog", padj = T)
  ) %>%
  dplyr::mutate(test = "permutation test")
## 
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.1  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.099  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
## Total runtime of your permutation test: 0.098  seconds.
alpha = 2.22e-16 # so we do not get infinite values

Fs_PES_perm %>%
  ggplot2::ggplot(
    aes(
      y = -log10(padj + alpha),
      x = tissue,
      group = transformation,
      linetype = transformation)) +
  ggplot2::geom_line(width = 0.05, alpha = 0.5) +
  ggplot2::geom_point(size = 5) +
  ggplot2::geom_hline(
    yintercept = -log10(0.05),
    colour = "#E64B35FF",
    linetype = 'dashed',
    size = 1
    ) +
  ggplot2::labs(
    title = "Fucus serratus",
    subtitle = "Permutation test; TAI male != female",
    x = "Tissue",
    y = "-log10(p_adjusted)"
  ) +
  ggplot2::coord_flip() +
  ggplot2::theme_classic()
## Warning in ggplot2::geom_line(width = 0.05, alpha = 0.5): Ignoring unknown
## parameters: `width`

Fs_PES_perm %>%
  dplyr::mutate(
    pval_label = case_when(
      padj > 0.05 ~ "ns",
      padj <= 0.05 & padj > 0.01 ~ "*",
      padj <= 0.01 & padj > 0.001 ~ "**",
      padj <= 0.001 ~ "***",
      .default = NA
    )
  ) %>%
  ggplot2::ggplot(
    aes(
      y = tissue,
      label = pval_label,
      x = transformation,
      fill = -log10(padj + alpha))) +
  ggplot2::geom_tile(colour = "black", size = 1) +
  ggplot2::geom_text() +
  ggplot2::theme_void() +
  ggplot2::theme(legend.position = "",
        axis.title.y = element_text(angle = 90),
        axis.text.y = element_text(),
        axis.text.x = element_text()) +
  ggplot2::coord_flip() +
  ggplot2::scale_fill_steps2(limits = c(0,3),
                             low = "white", high = "#E64B35FF") +
  ggplot2::labs(title = "Fucus serratus",
                subtitle = "Male TAI & female TAI differ?")

If we check whether males are younger than females (TAI)
permutations = 500

Fs_PES_perm <-
  dplyr::bind_rows(
    hack_rht(Fs_PES_M_matSP, Fs_PES_F_matSP, tr_name = "none", padj = T),
    hack_rht(Fs_PES_M_matSP.sqrt, Fs_PES_F_matSP.sqrt, tr_name = "sqrt", padj = T),
    hack_rht(Fs_PES_M_matSP.log2, Fs_PES_F_matSP.log2, tr_name = "log2", padj = T),
    hack_rht(Fs_PES_M_matSP.rank, Fs_PES_F_matSP.rank, tr_name = "rank", padj = T),
    hack_rht(Fs_PES_M_matSP.rlog, Fs_PES_F_matSP.rlog, tr_name = "rlog", padj = T)
  ) %>%
  dplyr::mutate(test = "permutation test")
Fs_PES_perm %>%
  dplyr::mutate(
    pval_label = case_when(
      padj > 0.05 ~ "ns",
      padj <= 0.05 & padj > 0.01 ~ "*",
      padj <= 0.01 & padj > 0.001 ~ "**",
      padj <= 0.001 ~ "***",
      .default = NA
    )
  ) %>%
  ggplot2::ggplot(
    aes(
      y = tissue,
      label = pval_label,
      x = transformation,
      fill = -log10(padj + alpha))) +
  ggplot2::geom_tile(colour = "black", size = 1) +
  ggplot2::geom_text() +
  ggplot2::theme_void() +
  ggplot2::theme(legend.position = "",
        axis.title.y = element_text(angle = 90),
        axis.text.y = element_text(),
        axis.text.x = element_text()) +
  ggplot2::coord_flip() +
  ggplot2::scale_fill_steps2(limits = c(0,3),
                             low = "white", high = "#E64B35FF") +
  ggplot2::labs(title = "Fucus serratus",
                subtitle = "Male TAI > female TAI?")

# ggplot2::ggsave(filename = "Fs_PES_perm.pdf", device = "pdf", width = 3, height = 2)

Add gamete data in plot

Non-transformed

Fd_PES_M <-
  readr::read_csv(file = "data/Fd_PES_M.csv")
Fd_PES_F <-
  readr::read_csv(file = "data/Fd_PES_F.csv")
Fs_PES_F <-
  readr::read_csv(file = "data/Fs_PES_F.csv") %>% dplyr::select(-matSP)
Fs_PES_M <-
  readr::read_csv(file = "data/Fs_PES_M.csv") %>% dplyr::select(-matSP)

# sqrt
Fd_PES_M.sqrt <-
  readr::read_csv(file = "data/Fd_PES_M.sqrt.csv") %>% dplyr::rename(gamete = V1)
Fd_PES_F.sqrt <-
  readr::read_csv(file = "data/Fd_PES_F.sqrt.csv") %>% dplyr::rename(gamete = V1)
Fs_PES_F.sqrt <-
  readr::read_csv(file = "data/Fs_PES_F.sqrt.csv") %>% dplyr::select(-matSP)
Fs_PES_M.sqrt <-
  readr::read_csv(file = "data/Fs_PES_M.sqrt.csv") %>% dplyr::select(-matSP)

# log2
Fd_PES_M.log2 <-
  readr::read_csv(file = "data/Fd_PES_M.log2.csv") %>% dplyr::rename(gamete = V1)
Fd_PES_F.log2 <-
  readr::read_csv(file = "data/Fd_PES_F.log2.csv") %>% dplyr::rename(gamete = V1)
Fs_PES_F.log2 <-
  readr::read_csv(file = "data/Fs_PES_F.log2.csv") %>% dplyr::select(-matSP)
Fs_PES_M.log2 <-
  readr::read_csv(file = "data/Fs_PES_M.log2.csv") %>% dplyr::select(-matSP)

# rank
Fd_PES_M.rank <-
  readr::read_csv(file = "data/Fd_PES_M.rank.csv") %>% dplyr::rename(gamete = V1)
Fd_PES_F.rank <-
  readr::read_csv(file = "data/Fd_PES_F.rank.csv") %>% dplyr::rename(gamete = V1)
Fs_PES_F.rank <-
  readr::read_csv(file = "data/Fs_PES_F.rank.csv") %>% dplyr::select(-matSP)
Fs_PES_M.rank <-
  readr::read_csv(file = "data/Fs_PES_M.rank.csv") %>% dplyr::select(-matSP)

# rlog
Fd_PES_M.rlog <-
  readr::read_csv(file = "data/Fd_PES_M.rlog.csv")
Fd_PES_F.rlog <-
  readr::read_csv(file = "data/Fd_PES_F.rlog.csv")
Fs_PES_F.rlog <-
  readr::read_csv(file = "data/Fs_PES_F.rlog.csv") %>% dplyr::select(-matSP)
Fs_PES_M.rlog <-
  readr::read_csv(file = "data/Fs_PES_M.rlog.csv") %>% dplyr::select(-matSP)
Fd_TAI_F <- 
  TI.preplot(Fd_PES_F) %>%
  dplyr::mutate(Sex = "F")
Fd_TAI_M <- 
  TI.preplot(Fd_PES_M) %>%
  dplyr::mutate(Sex = "M")
Fs_TAI_F <- 
  TI.preplot(Fs_PES_F) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M <- 
  TI.preplot(Fs_PES_M) %>%
  dplyr::mutate(Sex = "M")

# sqrt
Fd_TAI_F.sqrt <- 
  TI.preplot(Fd_PES_F.sqrt) %>%
  dplyr::mutate(Sex = "F")
Fd_TAI_M.sqrt <- 
  TI.preplot(Fd_PES_M.sqrt) %>%
  dplyr::mutate(Sex = "M")
Fs_TAI_F.sqrt <- 
  TI.preplot(Fs_PES_F.sqrt) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M.sqrt <- 
  TI.preplot(Fs_PES_M.sqrt) %>%
  dplyr::mutate(Sex = "M")

# log2
Fd_TAI_F.log2 <- 
  TI.preplot(Fd_PES_F.log2) %>%
  dplyr::mutate(Sex = "F")
Fd_TAI_M.log2 <- 
  TI.preplot(Fd_PES_M.log2) %>%
  dplyr::mutate(Sex = "M")
Fs_TAI_F.log2 <- 
  TI.preplot(Fs_PES_F.log2) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M.log2 <- 
  TI.preplot(Fs_PES_M.log2) %>%
  dplyr::mutate(Sex = "M")

# rank
Fd_TAI_F.rank <- 
  TI.preplot(Fd_PES_F.rank) %>%
  dplyr::mutate(Sex = "F")
Fd_TAI_M.rank <- 
  TI.preplot(Fd_PES_M.rank) %>%
  dplyr::mutate(Sex = "M")
Fs_TAI_F.rank <- 
  TI.preplot(Fs_PES_F.rank) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M.rank <- 
  TI.preplot(Fs_PES_M.rank) %>%
  dplyr::mutate(Sex = "M")

# rlog
Fd_TAI_F.rlog <- 
  TI.preplot(Fd_PES_F.rlog) %>%
  dplyr::mutate(Sex = "F")
Fd_TAI_M.rlog <- 
  TI.preplot(Fd_PES_M.rlog) %>%
  dplyr::mutate(Sex = "M")
Fs_TAI_F.rlog <- 
  TI.preplot(Fs_PES_F.rlog) %>%
  dplyr::mutate(Sex = "F")
Fs_TAI_M.rlog <- 
  TI.preplot(Fs_PES_M.rlog) %>%
  dplyr::mutate(Sex = "M")
plot_tissue_Fd_with_gametes <- function(preplot_out){
  plot_out <- 
    preplot_out %>%
    ggplot2::ggplot(
      aes(
        x = interaction(Tissue,Sex),
        y = TAI,
        group = Tissue,
        colour = Sex
        )
      ) +
    ggplot2::geom_point(
      size = 4, 
      position = ggplot2::position_dodge(width=0.9)) +
    ggplot2::geom_errorbar(
      aes(ymin = TAI - sd, ymax = TAI + sd),
      width = 0.1,
      position = ggplot2::position_dodge(width=0.9)) +
    ggplot2::geom_text(
      aes(label = Tissue),
      vjust = -0.9,
      angle = 90,
      position = ggplot2::position_dodge(width = 0.9),
      colour = "black") +
    ggplot2::scale_colour_manual(
      values = c("#00A087FF", "#E64B35FF", "#4DBBD5FF"), 
      guide = "none") +
    ggplot2::labs(
      x = "Sex",
      title = "Fucus distichus \n(matSP)",
      colour = NULL
    ) +
    ggplot2::theme_classic()
  return(plot_out)
}
Fd_combined_matSP <- 
  base::rbind(Fd_TAI_F.sqrt, Fd_TAI_M.sqrt) %>% 
  base::rbind(dplyr::mutate(Fd_TAI_matSP.sqrt, Sex = "Mixed"))
Fd_combined_matSP$Sex <- base::factor(Fd_combined_matSP$Sex, c("Mixed", "F", "M"))
Fd_combined_matSP %>%
  plot_tissue_Fd_with_gametes() + 
  ggplot2::labs(subtitle = "sqrt(TPM)", x = "Samples") +
  ggplot2::scale_x_discrete(breaks = 1)

  # ggplot2::scale_x_discrete(breaks = levels(Fd_combined_matSP$Sex))
# ggplot2::ggsave(filename = "Fd_TAI_tissue_gamete.pdf", device = "pdf", width = 2.2, height = 3)

Fs_combined_matSP <- base::rbind(Fs_TAI_F.sqrt, Fs_TAI_M.sqrt) %>% 
  base::rbind(Fs_TAI_F_matSP.sqrt) %>%
  base::rbind(Fs_TAI_M_matSP.sqrt)
Fs_combined_matSP$Tissue <- base::factor(Fs_combined_matSP$Tissue, c("holdfast", "reptip", "stipe", "vegtip", "gamete"))
Fs_combined_matSP %>%
  plot_tissue_Fs() + 
  ggplot2::labs(subtitle = "sqrt(TPM)", x = "Samples") +
  ggplot2::scale_x_discrete(breaks = 1)

# ggplot2::ggsave(filename = "Fs_TAI_tissue_gamete.pdf", device = "pdf", width = 4, height = 3)

Mean expression of different PS

We check whether newer genes are restricted in expression during the waist of the hourglass. Or the older genes are upregulated.

library(tidyverse)
library(myTAI)

Load PES

Non-transformed

Fd_PES <-
  readr::read_csv(file = "data/Fd_PES.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 7953 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (9): PS, gamete, S1, S2, S3, S4, S4.5, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Fs_PES <-
  readr::read_csv(file = "data/Fs_PES.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 8291 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (8): PS, gamete, S1, S2, S3, S4, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

sqrt-tranformed

Fd_PES.sqrt <-
  readr::read_csv(file = "data/Fd_PES.sqrt.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 7953 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (9): PS, gamete, S1, S2, S3, S4, S4.5, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Fs_PES.sqrt <-
  readr::read_csv(file = "data/Fs_PES.sqrt.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 8291 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (8): PS, gamete, S1, S2, S3, S4, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

log2-tranformed

Fd_PES.log2 <-
  readr::read_csv(file = "data/Fd_PES.log2.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 7953 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (9): PS, gamete, S1, S2, S3, S4, S4.5, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Fs_PES.log2 <-
  readr::read_csv(file = "data/Fs_PES.log2.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 8291 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (8): PS, gamete, S1, S2, S3, S4, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

rank-tranformed

Fd_PES.rank <-
  readr::read_csv(file = "data/Fd_PES.rank.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 7953 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (9): PS, gamete, S1, S2, S3, S4, S4.5, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Fs_PES.rank <-
  readr::read_csv(file = "data/Fs_PES.rank.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 8291 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (8): PS, gamete, S1, S2, S3, S4, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

rlog-tranformed

Fd_PES.rlog <-
  readr::read_csv(file = "data/Fd_PES.rlog.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 7953 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (9): PS, gamete, S1, S2, S3, S4, S4.5, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Fs_PES.rlog <-
  readr::read_csv(file = "data/Fs_PES.rlog.csv") %>% 
  dplyr::select(-c("gamete", "matSP")) 
## Rows: 8291 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (8): PS, gamete, S1, S2, S3, S4, S5, matSP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Average expression of all genes

To have an understanding of the variance in the median calculation, I will obtain the standard deviation via bootstrapping.

plot_relative_expression <- function(PES, average = "mean", plot = T){
  averaging_function <- base::match.fun(average)        

  PES_in <- 
    PES %>%
    tidyr::pivot_longer(!c(PS, GeneID), values_to = "Expression", names_to = "Stage") %>%
    dplyr::group_by(PS,Stage) %>%
    dplyr::summarise(mean_expression = averaging_function(Expression),
                     n_genes = n())
  
  max_expression <- 
    PES_in %>%
    dplyr::group_by(PS) %>%
    dplyr::summarise(max_expression = max(mean_expression),
                     min_expression = min(mean_expression))

  # normalise expression between 0 and 1
  PES_relative_expression <-
    left_join(PES_in, max_expression, by = "PS") %>%
    dplyr::mutate(relative_expression = (mean_expression-min_expression)/(max_expression-min_expression))
  
  PES_relative_expression$Stage <- 
    factor(PES_relative_expression$Stage, levels = unique(colnames(PES)[-c(1,2)]))
  
  # return if plotting is not desired.
  if(!plot){return(PES_relative_expression)}
  
  PES_relative_expression <-
    dplyr::mutate(PES_relative_expression, PS_category = case_when(
      PS < 7 ~ "ancient genes",
      TRUE~ "young genes"
    )) %>%
    dplyr::mutate(PS_representativeness = case_when(
      n_genes < 1000 ~ "low",
      TRUE~ "high"
    ))
  
  ggplot2::ggplot(PES_relative_expression,
                  aes(x = Stage, 
                      y = relative_expression, 
                      group = PS, 
                      # linetype = PS_representativeness,
                      # linewidth = log2(n_genes),
                      colour = factor(PS))) +
  # ggplot2::geom_line(alpha = 0.5) +
  ggplot2::geom_line(linewidth = 2, linetype = "twodash") +
  ggplot2::facet_wrap(vars(PS_category)) +
  ggplot2::scale_color_brewer(palette="Dark2") +
  ggplot2::labs(y = "relative expression", colour = "PS")
}
plot_relative_expression(Fs_PES) + ggplot2::labs(title = "Fs", subtitle = "TPM")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fd_PES) + ggplot2::labs(title = "Fd", subtitle = "TPM")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fs_PES.sqrt) + ggplot2::labs(title = "Fs", subtitle = "sqrt(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fd_PES.sqrt) + ggplot2::labs(title = "Fd", subtitle = "sqrt(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fs_PES.log2) + ggplot2::labs(title = "Fs", subtitle = "log2(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fd_PES.log2) + ggplot2::labs(title = "Fd", subtitle = "log2(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fs_PES.rank) + ggplot2::labs(title = "Fs", subtitle = "rank(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fd_PES.rank) + ggplot2::labs(title = "Fd", subtitle = "rank(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fs_PES.rlog) + ggplot2::labs(title = "Fs", subtitle = "rlog(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_relative_expression(Fd_PES.rlog) + ggplot2::labs(title = "Fd", subtitle = "rlog(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

Thus, we see a general pattern that the hourglass pattern comes from the down-regulation of younger genes rather than an upregulation of older genes, especially for the sqrt and log2 transformed data. The dataset with ranked show that if we emphasise genes with low expression in the resulting TAI profile, we see increased expression of older genes in the mid-embryo. However, this transformation is non-standard and it is unclear what is even meant by “relative expression” in the context of the rank data.

Plot mean CI

Second attempt.

plot_PS_CImean <- function(PES){
  PES_in <- 
    PES %>%
    tidyr::pivot_longer(!c(PS, GeneID), values_to = "Expression", names_to = "Stage") %>%
    dplyr::group_by(PS,Stage) %>%
    dplyr::summarise(ggplot2::mean_cl_boot(Expression))
  
  PES_in$Stage <- 
    factor(PES_in$Stage, levels = unique(colnames(PES)[-c(1,2)]))
  
  PES_in %>%
    ggplot2::ggplot(aes(x = Stage, y = y, colour = factor(PS), group = PS)) +
    ggplot2::geom_ribbon(aes(ymin = ymin, ymax = ymax), 
              alpha = 0.1, color = NA) +
    ggplot2::geom_line(size = 1) + 
    ggplot2::scale_color_brewer(palette="Dark2") +
    ggplot2::facet_wrap(vars(PS), scales = "free_y") +
    ggplot2::labs(y = "Mean expression level", colour = "PS")
  }
plot_PS_CImean(Fs_PES) + ggplot2::labs(title = "Fs", subtitle = "TPM")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fd_PES) + ggplot2::labs(title = "Fd", subtitle = "TPM")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fs_PES.sqrt) + ggplot2::labs(title = "Fs", subtitle = "sqrt(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fd_PES.sqrt) + ggplot2::labs(title = "Fd", subtitle = "sqrt(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fs_PES.log2) + ggplot2::labs(title = "Fs", subtitle = "log2(TPM)", y = "mean log2(TPM+1)") + ggplot2::theme_classic()
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

# ggplot2::ggsave(filename = "Fs_PS_embryo.pdf", device = "pdf", width = 6, height = 4)
plot_PS_CImean(Fd_PES.log2) + ggplot2::labs(title = "Fd", subtitle = "log2(TPM)", y = "mean log2(TPM+1)") + ggplot2::theme_classic()
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

# ggplot2::ggsave(filename = "Fd_PS_embryo.pdf", device = "pdf", width = 6, height = 4)

plot_PS_CImean(Fs_PES.rank) + ggplot2::labs(title = "Fs", subtitle = "rank(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fd_PES.rank) + ggplot2::labs(title = "Fd", subtitle = "rank(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fs_PES.rlog) + ggplot2::labs(title = "Fs", subtitle = "rlog(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

plot_PS_CImean(Fd_PES.rlog) + ggplot2::labs(title = "Fd", subtitle = "rlog(TPM)")
## `summarise()` has grouped output by 'PS'. You can override using the `.groups`
## argument.

So what we see with more details is that the older genes are expressed with more variance and the youngest genes are completely downregulated in the ‘phylotypic stage’. The spread is based on 95% confidence interval.

Get session info.

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.2 (2022-10-31)
##  os       macOS Big Sur ... 10.16
##  system   x86_64, darwin17.0
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Europe/Berlin
##  date     2024-03-21
##  pandoc   2.17.1.1 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package      * version    date (UTC) lib source
##  backports      1.4.1      2021-12-13 [1] CRAN (R 4.2.0)
##  base64enc      0.1-3      2015-07-28 [1] CRAN (R 4.2.0)
##  bit            4.0.5      2022-11-15 [1] CRAN (R 4.2.0)
##  bit64          4.0.5      2020-08-30 [1] CRAN (R 4.2.0)
##  bslib          0.5.0      2023-06-09 [1] CRAN (R 4.2.0)
##  cachem         1.0.8      2023-05-01 [1] CRAN (R 4.2.0)
##  callr          3.7.3      2022-11-02 [1] CRAN (R 4.2.0)
##  checkmate      2.2.0      2023-04-27 [1] CRAN (R 4.2.0)
##  cli            3.6.1      2023-03-23 [1] CRAN (R 4.2.0)
##  cluster        2.1.4      2022-08-22 [1] CRAN (R 4.2.2)
##  codetools      0.2-19     2023-02-01 [1] CRAN (R 4.2.0)
##  colorspace     2.1-0      2023-01-23 [1] CRAN (R 4.2.0)
##  crayon         1.5.2      2022-09-29 [1] CRAN (R 4.2.0)
##  data.table     1.14.8     2023-02-17 [1] CRAN (R 4.2.0)
##  devtools       2.4.5      2022-10-11 [1] CRAN (R 4.2.0)
##  digest         0.6.33     2023-07-07 [1] CRAN (R 4.2.0)
##  dplyr        * 1.1.2      2023-04-20 [1] CRAN (R 4.2.0)
##  ellipsis       0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
##  evaluate       0.21       2023-05-05 [1] CRAN (R 4.2.0)
##  fansi          1.0.4      2023-01-22 [1] CRAN (R 4.2.0)
##  farver         2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
##  fastmap        1.1.1      2023-02-24 [1] CRAN (R 4.2.0)
##  fitdistrplus   1.1-11     2023-04-25 [1] CRAN (R 4.2.0)
##  forcats      * 1.0.0      2023-01-29 [1] CRAN (R 4.2.0)
##  foreach        1.5.2      2022-02-02 [1] CRAN (R 4.2.0)
##  foreign        0.8-84     2022-12-06 [1] CRAN (R 4.2.0)
##  Formula        1.2-5      2023-02-24 [1] CRAN (R 4.2.0)
##  fs             1.6.3      2023-07-20 [1] CRAN (R 4.2.0)
##  generics       0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
##  ggplot2      * 3.4.3      2023-08-14 [1] CRAN (R 4.2.0)
##  ggsci          3.0.0      2023-03-08 [1] CRAN (R 4.2.0)
##  glue           1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
##  gridExtra      2.3        2017-09-09 [1] CRAN (R 4.2.0)
##  gtable         0.3.3      2023-03-21 [1] CRAN (R 4.2.0)
##  highr          0.10       2022-12-22 [1] CRAN (R 4.2.0)
##  Hmisc          5.1-0      2023-05-08 [1] CRAN (R 4.2.0)
##  hms            1.1.3      2023-03-21 [1] CRAN (R 4.2.0)
##  htmlTable      2.4.1      2022-07-07 [1] CRAN (R 4.2.0)
##  htmltools      0.5.7      2023-11-03 [1] CRAN (R 4.2.0)
##  htmlwidgets    1.6.4      2023-12-06 [1] CRAN (R 4.2.0)
##  httpuv         1.6.11     2023-05-11 [1] CRAN (R 4.2.0)
##  iterators      1.0.14     2022-02-05 [1] CRAN (R 4.2.0)
##  jquerylib      0.1.4      2021-04-26 [1] CRAN (R 4.2.0)
##  jsonlite       1.8.7      2023-06-29 [1] CRAN (R 4.2.0)
##  knitr          1.43       2023-05-25 [1] CRAN (R 4.2.0)
##  labeling       0.4.2      2020-10-20 [1] CRAN (R 4.2.0)
##  later          1.3.1      2023-05-02 [1] CRAN (R 4.2.0)
##  lattice        0.21-8     2023-04-05 [1] CRAN (R 4.2.0)
##  lifecycle      1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
##  lubridate    * 1.9.2      2023-02-10 [1] CRAN (R 4.2.0)
##  magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
##  MASS           7.3-60     2023-05-04 [1] CRAN (R 4.2.0)
##  Matrix         1.6-0      2023-07-08 [1] CRAN (R 4.2.2)
##  memoise        2.0.1      2021-11-26 [1] CRAN (R 4.2.0)
##  mime           0.12       2021-09-28 [1] CRAN (R 4.2.0)
##  miniUI         0.1.1.1    2018-05-18 [1] CRAN (R 4.2.0)
##  munsell        0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
##  myTAI        * 1.0.1.9000 2023-08-18 [1] Github (drostlab/myTAI@59a95dd)
##  nnet           7.3-19     2023-05-03 [1] CRAN (R 4.2.0)
##  pillar         1.9.0      2023-03-22 [1] CRAN (R 4.2.0)
##  pkgbuild       1.4.0      2022-11-27 [1] CRAN (R 4.2.0)
##  pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
##  pkgload        1.3.2.1    2023-07-08 [1] CRAN (R 4.2.0)
##  prettyunits    1.1.1      2020-01-24 [1] CRAN (R 4.2.0)
##  processx       3.8.2      2023-06-30 [1] CRAN (R 4.2.0)
##  profvis        0.3.8      2023-05-02 [1] CRAN (R 4.2.0)
##  promises       1.2.0.1    2021-02-11 [1] CRAN (R 4.2.0)
##  ps             1.7.5      2023-04-18 [1] CRAN (R 4.2.0)
##  purrr        * 1.0.2      2023-08-10 [1] CRAN (R 4.2.0)
##  R6             2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
##  RColorBrewer   1.1-3      2022-04-03 [1] CRAN (R 4.2.0)
##  Rcpp           1.0.11     2023-07-06 [1] CRAN (R 4.2.0)
##  readr        * 2.1.4      2023-02-10 [1] CRAN (R 4.2.0)
##  remotes        2.4.2      2021-11-30 [1] CRAN (R 4.2.0)
##  rlang          1.1.1      2023-04-28 [1] CRAN (R 4.2.0)
##  rmarkdown      2.22       2023-06-01 [1] CRAN (R 4.2.0)
##  rpart          4.1.19     2022-10-21 [1] CRAN (R 4.2.2)
##  rstudioapi     0.14       2022-08-22 [1] CRAN (R 4.2.0)
##  sass           0.4.6      2023-05-03 [1] CRAN (R 4.2.0)
##  scales         1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
##  sessioninfo    1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
##  shiny          1.7.4      2022-12-15 [1] CRAN (R 4.2.0)
##  stringi        1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
##  stringr      * 1.5.0      2022-12-02 [1] CRAN (R 4.2.0)
##  survival       3.5-5      2023-03-12 [1] CRAN (R 4.2.2)
##  tibble       * 3.2.1      2023-03-20 [1] CRAN (R 4.2.0)
##  tidyr        * 1.3.0      2023-01-24 [1] CRAN (R 4.2.0)
##  tidyselect     1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
##  tidyverse    * 2.0.0      2023-02-22 [1] CRAN (R 4.2.0)
##  timechange     0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
##  tzdb           0.4.0      2023-05-12 [1] CRAN (R 4.2.0)
##  urlchecker     1.0.1      2021-11-30 [1] CRAN (R 4.2.0)
##  usethis        2.2.0      2023-06-06 [1] CRAN (R 4.2.0)
##  utf8           1.2.3      2023-01-31 [1] CRAN (R 4.2.0)
##  vctrs          0.6.3      2023-06-14 [1] CRAN (R 4.2.0)
##  vroom          1.6.3      2023-04-28 [1] CRAN (R 4.2.0)
##  withr          2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
##  xfun           0.39       2023-04-20 [1] CRAN (R 4.2.0)
##  xtable         1.8-4      2019-04-21 [1] CRAN (R 4.2.0)
##  yaml           2.3.7      2023-01-23 [1] CRAN (R 4.2.0)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────